{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  本周目标\n",
    "#### 1.观察3变量+关键词的URL\n",
    "#### 2.解析URL参数\n",
    "#### 3.创建payload模板\n",
    "#### 4.xpath解析页数据\n",
    "#### 5.翻页\n",
    "##### 注：Ctrl+/ 可以批量注释"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'互联网/电商': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=040&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '游戏产业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=420&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '计算机软件': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=010&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " 'IT服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=030&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '电子/芯片/半导体': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=050&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '通信业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=060&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '计算机/网络设备': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=020&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '房地产/建筑': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=080&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '规划/设计/装潢': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=100&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '房地产服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=090&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '银行': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=130&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '保险': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=140&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '基金/证券/投资': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=150&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '会计/审计': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=430&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '信托/担保/拍卖': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=500&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '快消品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=190&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '批发零售': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=240&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '服装纺织': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=200&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '家具/家电': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=210&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '办公设备': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=220&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '奢侈品/收藏品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=460&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '珠宝/玩具/工艺品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=470&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '汽车/摩托车': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=350&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '机械/机电/重工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=360&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '印刷/包装/造纸': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=180&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '原材料加工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=370&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '仪器/电气/自动化': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=340&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '制药/生物工程': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=270&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '医疗/保健/美容': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=280&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '医疗器械': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=290&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '能源/水利': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=330&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '石油/化工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=310&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '采掘/冶炼/矿产': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=320&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '环保': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=300&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '新能源': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=490&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '专业服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=120&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '中介服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=110&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '外包服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=440&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '检测/认证': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=450&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '餐饮/酒旅/服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=230&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '文体娱乐': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=260&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '租赁服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=510&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '广告/市场/会展': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=070&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '影视文化': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=170&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '教育培训': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=380&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '交通/物流/运输': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=250&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '贸易/进出口': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=160&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '航空/航天': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=480&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '政务/公共服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=390&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '农林牧渔': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=410&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd',\n",
       " '其他行业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=400&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd'}"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from requests_html import HTMLSession\n",
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "session = HTMLSession()\n",
    "r = session.get(url)\n",
    "\n",
    "行业细分 = r.html.xpath(\"//div[@class='sub-industry']/a\")     # 原代码    <div class=\"sub-industry\" style=\"top: 38px;\">\n",
    "行业字典 = {a.xpath(\"a/text()\")[0]:a.xpath(\"a/@href\")[0]for a in 行业细分}\n",
    "行业字典"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_01&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=040&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_01&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=420&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_01&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=010&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_01&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=030&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_02&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=050&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_02&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=060&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_02&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=020&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_03&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=080&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_03&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=100&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_03&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=090&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_04&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=130&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_04&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=140&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_04&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=150&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_04&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=430&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_04&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=500&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=190&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=240&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=200&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=210&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=220&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=460&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_05&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=470&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_06&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=350&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_06&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=360&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_06&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=180&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_06&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=370&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_06&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=340&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_10&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=270&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_10&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=280&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_10&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=290&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_11&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=330&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_11&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=310&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_11&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=320&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_11&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=300&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_11&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=490&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=120&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=110&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=440&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=450&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=230&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=260&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_07&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=510&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_08&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=070&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_08&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=170&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_08&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=380&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_09&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=250&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_09&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=160&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_09&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=480&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_12&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=390&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_12&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=410&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='subIndustry=&init=-1&industryType=industry_12&headckid=db303480df20d2d6&flushckid=1&fromSearchBtn=2&industries=400&ckid=db303480df20d2d6&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=99fe38384bc346b495ceff0e7d5359bd&d_curPage=0&d_pageSize=40&d_headId=99fe38384bc346b495ceff0e7d5359bd', fragment='')]"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用urllib.parse 解析数据\n",
    "from urllib.parse import urlparse, parse_qs\n",
    "[ urlparse(x) for x in 行业字典.values()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 51 entries, 0 to 50\n",
      "Data columns (total 6 columns):\n",
      "scheme      51 non-null object\n",
      "netloc      51 non-null object\n",
      "path        51 non-null object\n",
      "params      51 non-null object\n",
      "query       51 non-null object\n",
      "fragment    51 non-null object\n",
      "dtypes: object(6)\n",
      "memory usage: 2.5+ KB\n",
      "scheme       1\n",
      "netloc       1\n",
      "path         1\n",
      "params       1\n",
      "query       51\n",
      "fragment     1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>subIndustry=&amp;init=-1&amp;industryType=industry_01&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  subIndustry=&init=-1&industryType=industry_01&...           "
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame([urlparse(x) for x in 行业字典.values()])\n",
    "df.info()\n",
    "print(df.nunique())\n",
    "df.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init              1\n",
      "industryType     12\n",
      "headckid          1\n",
      "flushckid         1\n",
      "fromSearchBtn     1\n",
      "industries       51\n",
      "ckid              1\n",
      "siTag             1\n",
      "d_sfrom           1\n",
      "d_ckId            1\n",
      "d_curPage         1\n",
      "d_pageSize        1\n",
      "d_headId          1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>industryType</th>\n",
       "      <th>industries</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>420</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>industry_02</td>\n",
       "      <td>050</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>industry_02</td>\n",
       "      <td>060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>industry_02</td>\n",
       "      <td>020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>industry_03</td>\n",
       "      <td>080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>industry_03</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>industry_03</td>\n",
       "      <td>090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>industry_04</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>industry_04</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>industry_04</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>industry_04</td>\n",
       "      <td>430</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>industry_04</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>210</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>21</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>470</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>22</td>\n",
       "      <td>industry_06</td>\n",
       "      <td>350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>23</td>\n",
       "      <td>industry_06</td>\n",
       "      <td>360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>24</td>\n",
       "      <td>industry_06</td>\n",
       "      <td>180</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>25</td>\n",
       "      <td>industry_06</td>\n",
       "      <td>370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>26</td>\n",
       "      <td>industry_06</td>\n",
       "      <td>340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>27</td>\n",
       "      <td>industry_10</td>\n",
       "      <td>270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>28</td>\n",
       "      <td>industry_10</td>\n",
       "      <td>280</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>29</td>\n",
       "      <td>industry_10</td>\n",
       "      <td>290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>30</td>\n",
       "      <td>industry_11</td>\n",
       "      <td>330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>31</td>\n",
       "      <td>industry_11</td>\n",
       "      <td>310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>32</td>\n",
       "      <td>industry_11</td>\n",
       "      <td>320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>33</td>\n",
       "      <td>industry_11</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>34</td>\n",
       "      <td>industry_11</td>\n",
       "      <td>490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>35</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>36</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>37</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>38</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>450</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>39</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>40</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>260</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>41</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>510</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>42</td>\n",
       "      <td>industry_08</td>\n",
       "      <td>070</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>43</td>\n",
       "      <td>industry_08</td>\n",
       "      <td>170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>44</td>\n",
       "      <td>industry_08</td>\n",
       "      <td>380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>45</td>\n",
       "      <td>industry_09</td>\n",
       "      <td>250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>46</td>\n",
       "      <td>industry_09</td>\n",
       "      <td>160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>47</td>\n",
       "      <td>industry_09</td>\n",
       "      <td>480</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>48</td>\n",
       "      <td>industry_12</td>\n",
       "      <td>390</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>49</td>\n",
       "      <td>industry_12</td>\n",
       "      <td>410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>50</td>\n",
       "      <td>industry_12</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   industryType industries\n",
       "0   industry_01        040\n",
       "1   industry_01        420\n",
       "2   industry_01        010\n",
       "3   industry_01        030\n",
       "4   industry_02        050\n",
       "5   industry_02        060\n",
       "6   industry_02        020\n",
       "7   industry_03        080\n",
       "8   industry_03        100\n",
       "9   industry_03        090\n",
       "10  industry_04        130\n",
       "11  industry_04        140\n",
       "12  industry_04        150\n",
       "13  industry_04        430\n",
       "14  industry_04        500\n",
       "15  industry_05        190\n",
       "16  industry_05        240\n",
       "17  industry_05        200\n",
       "18  industry_05        210\n",
       "19  industry_05        220\n",
       "20  industry_05        460\n",
       "21  industry_05        470\n",
       "22  industry_06        350\n",
       "23  industry_06        360\n",
       "24  industry_06        180\n",
       "25  industry_06        370\n",
       "26  industry_06        340\n",
       "27  industry_10        270\n",
       "28  industry_10        280\n",
       "29  industry_10        290\n",
       "30  industry_11        330\n",
       "31  industry_11        310\n",
       "32  industry_11        320\n",
       "33  industry_11        300\n",
       "34  industry_11        490\n",
       "35  industry_07        120\n",
       "36  industry_07        110\n",
       "37  industry_07        440\n",
       "38  industry_07        450\n",
       "39  industry_07        230\n",
       "40  industry_07        260\n",
       "41  industry_07        510\n",
       "42  industry_08        070\n",
       "43  industry_08        170\n",
       "44  industry_08        380\n",
       "45  industry_09        250\n",
       "46  industry_09        160\n",
       "47  industry_09        480\n",
       "48  industry_12        390\n",
       "49  industry_12        410\n",
       "50  industry_12        400"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 针对query 51  进行再次解析\n",
    "\n",
    "#df_qs = pd.DataFrame([ parse_qs(x) for x in df['query'] ])\n",
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "print (df_qs.nunique())\n",
    "df_qs.head()\n",
    "df_qs[['industryType','industries']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "edu         5\n",
      "经验          5\n",
      "薪水         68\n",
      "时间         21\n",
      "职称        181\n",
      "公司地点       74\n",
      "公司名称       70\n",
      "链结        201\n",
      "公司URL      72\n",
      "热门公司类型      6\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>职称</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>公司名称</th>\n",
       "      <th>edu</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>阿里巴巴</td>\n",
       "      <td>学历不限</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>天能电池集团股份有限公司</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>网易集团</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td rowspan=\"2\" valign=\"top\">科大讯飞</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>本科及以上</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>深圳市丰巢科技有限公司</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>深圳市优必选科技股份有限公司</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>深圳视见医疗科技有限公司</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>猎聘</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>麒麟合盛网络技术股份有限公司</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      职称\n",
       "公司名称           edu      \n",
       "阿里巴巴           学历不限   17\n",
       "天能电池集团股份有限公司   本科及以上  16\n",
       "网易集团           本科及以上  11\n",
       "科大讯飞           统招本科   10\n",
       "               本科及以上  10\n",
       "...                   ..\n",
       "深圳市丰巢科技有限公司    本科及以上   1\n",
       "深圳市优必选科技股份有限公司 本科及以上   1\n",
       "深圳视见医疗科技有限公司   本科及以上   1\n",
       "猎聘             大专及以上   1\n",
       "麒麟合盛网络技术股份有限公司 本科及以上   1\n",
       "\n",
       "[100 rows x 1 columns]"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 建构  参数模板  函数\n",
    "参数_compTag_用户体验 = {'中国500强': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['155'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}, '2018互联网300强': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['182'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}, '制造业500强': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['186'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}, 'AI创新成长50强 ': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['189'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}, '独角兽': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['130'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}, '上市公司': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['156'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}}\n",
    "参数_compTag_行业_用户体验 = {}\n",
    "参数_中国500强_行业_用户体验 = {}\n",
    "参数_2018互联网300强_行业_用户体验 = {}\n",
    "# 用户体验compTag industryType industries ...城市  总URL  （6*51）\n",
    "\n",
    "liepin分类 = {}\n",
    "\n",
    "# 多个页面准备测试1  中国500强\n",
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "session = HTMLSession()\n",
    "payload = 参数_compTag_用户体验['中国500强']\n",
    "r = session.get( url, params = payload)\n",
    "\n",
    "# r.url\n",
    "\n",
    "# 单一页面爬 + 分析\n",
    "session = HTMLSession()\n",
    "\n",
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "\n",
    "    # 先取特定元素, 精准打击其子后辈\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    # 作为xpath字典，键为我要抓的牛肉名称，值为xpath\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司地点': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "    \n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)\n",
    "\n",
    "#  多个页面\n",
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "\n",
    "list_df = list()\n",
    "for k,v in 参数_compTag_用户体验.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (热门公司类型 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all = pd.concat(list_df)\n",
    "df_all\n",
    "\n",
    "#  输出\n",
    "df_all.to_excel(\"20春_Web数据挖掘_Week6_liepin_各热门公司类型.xlsx\", sheet_name=\"搜查结果\")\n",
    "\n",
    "# Pandas  基本能力\n",
    "\n",
    "print (df_all.nunique())\n",
    "df_all[['edu']].drop_duplicates()\n",
    "\n",
    "df_all.groupby(['公司名称','edu']).agg({\"职称\":\"count\"}).sort_values(by='职称', ascending=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
